/********************************************************************************
Discrimination in Multi-Phase Systems: Evidence from Child Protection

Created on: 12/28/2022
Last Modified on: 2/20/2024

Description: This program generates the primary screener-level screen-in rates,
placement rates, and outcome rates, as well as standard errors for these estimates.

It takes as an input a child by call dataset with the main sample restrictions
outlined in our paper. Using this dataset, the program then estimates
screener-specific rates and standard errors using a linear adjustment to 
account for randomization strata, which we discuss in the paper.  

Note that we have removed the file directory names from this program for 
confidentiality reasons.
********************************************************************************/

**************************
**(0) SETUP
**************************
clear
set more off
macro drop all
capture log close
set seed 02042023

*Set directories 
global cleandata 
global tmpdata 
global rawdata 
global output 


*************************
**(1) GENERATE SCREENERS' SCREEN-IN AND MALTREATMENT RATES, CONDITIONAL ON CALL BEING SCREENED-OUT 
*************************
**Start with the universe of hotline calls in the analysis sample (after making the main sample restrictions)
use "${tmpdata}all_hotline_calls_main_restrictions_qje.dta", replace 

**Drop screeners with fewer than 100 calls and investigators with fewer than 200 investigations
bysort screener: gen n = _N 
keep if n >100 

cap drop n 
bysort worker_id: gen n = _N if screened==1
drop if n<200 & screened==1 //note that worker_id is set to missing when screened == 0//

**Additional sample restrictions needed for the analysis (sexual abuse cases are not randomly assigned)
drop if sexab==1 

**Zip code is a key variable in the analysis 
replace zipcode_vic="." if zipcode_vic=="NA"
destring zipcode_vic, replace 
drop if zipcode_vic==. & screened==1

**Keep only calls (and investigations) that are not within 365 days for the same child 
sort childpartyid cw_date_stata intake_id, stable
by childpartyid: gen diff = cw_date_stata[_n] - cw_date_stata[_n-1]
order diff 
drop if diff<365 & diff!=.

**Generate screener unique numeric identifiers 
drop screener 
egen screener = group(scrnr_first_nm scrnr_last_nm) //162 screeners 

**Generate rotation variable: exact day X shift fixed effect 
gen cw_time = substr(complaint_dttm,12,5)
order cw_time 

gen cw_hour = substr(cw_time,1,2)
order cw_hour 
destring cw_hour, replace 

tab cw_hour
replace cw_hour=24 if cw_hour==0 

gen shift=. 
replace shift=1 if cw_hour >=8 & cw_hour<=16 
replace shift=2 if cw_hour >16 & cw_hour<=24 
replace shift=3 if cw_hour >=1 & cw_hour<=7

egen rotation = group(cw_date_stata shift)

**Rename screener id as "worker_id" to change code minimally, relative to the investigator analysis
drop worker_id 
rename (screener) (worker_id)

**Globals for variable lists
global fixedeffect = "rotation"

**Keep only variables of interest 
keep worker_id black white screened inv6m $fixedeffect childpartyid fc
sum worker_id black white screened inv6m $fixedeffect childpartyid fc

egen grpworker_id = group(worker_id)
levelsof grpworker_id
local levels = "`r(levels)'"

preserve 
**Generate screener screen-in and subsequent maltreatment rates, conditional on call being screened-out 
* Loop over all the outcomes 
foreach var in screened inv6m {
	* Use full sample for screen-in rates 
	if "`var'"=="screened"{
		qui: reghdfe `var' i.grpworker_id, resid absorb($fixedeffect)
	}
	
	* For selectively observed vars, condition on being screened-out 
	if "`var'"!="screened"{
		qui: reghdfe `var' i.grpworker_id if screened == 0, resid absorb($fixedeffect)
	}
	
	* retrieve average of fixed effect
	predict xbd, xbd
	predict xb, xb
	gen d = xbd - xb 
	sum d 
	local exp = r(mean)
	
	gen base_`var' = .
	gen se_`var' = .
	
	qui{
	foreach i in `levels' {
		capture: lincom _cons + _b[`i'.grpworker_id] + `exp'
		
		if _rc == 0 {
            replace base_`var' =  r(estimate) if grpworker_id == `i'
			replace se_`var'= r(se) if  grpworker_id==`i'
		}
	}
	}
	drop xbd xb d
	* By race (black and white)
	* For fc placement outcome, use the entire sample
	qui {
	if "`var'"=="screened"{
		gen x = `var' if black == 1 | white == 1
		reghdfe x i.grpworker_id i.grpworker_id#i.black, absorb($fixedeffect) resid
	}
	
	* For other outcomes, condition on being screened out 
	if "`var'"!="screened"{
		gen x = `var' if black == 1 | white == 1
		reghdfe x i.grpworker_id i.grpworker_id#i.black if screened==0, resid absorb($fixedeffect)
	}
	}
	
	* retrieve average of fixed effect
	predict xbd, xbd
	predict xb, xb
	gen d = xbd - xb 
	sum d 
	local exp = r(mean)
	
	gen b_`var' = .
	gen b_se_`var' = .
	gen w_`var' = .
	gen w_se_`var' = .
   
   	* Quietly
	qui {
	* White children
	foreach i in `levels'{
		capture: lincom _cons + _b[`i'.grpworker_id] + `exp'
		if _rc == 0 {
            replace w_`var' =  r(estimate) if grpworker_id == `i'
			replace w_se_`var'= r(se) if  grpworker_id==`i'
		}
	}
	* Black children
	foreach i in `levels'{
		capture: lincom _cons + _b[`i'.grpworker_id] + _b[`i'.grpworker_id#1.black] + `exp'
		if _rc == 0 {
            replace b_`var' =  r(estimate) if grpworker_id == `i'
			replace b_se_`var'= r(se) if  grpworker_id==`i'
		}
	}
	}
	drop x xbd xb d
}


*----------------------------------------------------------------------*
* Save screener-level dataset 
*----------------------------------------------------------------------*
bys worker_id: gen count_inv = _N
bys worker_id: egen count_black = total(black)
bys worker_id : egen count_white = total(white)

* Keep relevant vars
keep base_* b_* w_* se_* worker_id count_inv count_black count_white grpworker_id
duplicates drop worker_id, force 	
drop if worker_id == .

save "${cleandata}screener_screening_maltreatment_rates_qje.dta", replace
restore 


*************************
**(2) GENERATE SCREENERS' PLACEMENT AND MALTREATMENT RATES, CONDITIONAL ON CALL NOT BEING PLACED
*************************
preserve
* Loop over all the outcomes 
foreach var in fc inv6m {
	* Use full sample for foster care placement
	if "`var'"=="fc"{
		qui: reghdfe `var' i.grpworker_id, resid absorb($fixedeffect)
	}
	
	* For selectively observed vars, condition on not being placed in foster care 
	if "`var'"!="fc"{
		qui: reghdfe `var' i.grpworker_id if fc == 0, resid absorb($fixedeffect)
	}
	
	* retrieve average of fixed effect
	predict xbd, xbd
	predict xb, xb
	gen d = xbd - xb 
	sum d 
	local exp = r(mean)
	
	gen base_`var' = .
	gen se_`var' = .
	
	qui{
	foreach i in `levels' {
		capture: lincom _cons + _b[`i'.grpworker_id] + `exp'
		
		if _rc == 0 {
            replace base_`var' =  r(estimate) if grpworker_id == `i'
			replace se_`var'= r(se) if  grpworker_id==`i'
		}
	}
	}
	drop xbd xb d
	* By race (black and white)
	* For fc placement outcome, use the entire sample
	qui {
	if "`var'"=="fc"{
		gen x = `var' if black == 1 | white == 1
		reghdfe x i.grpworker_id i.grpworker_id#i.black, absorb($fixedeffect) resid
	}
	
	* For other outcomes, condition on being screened out 
	if "`var'"!="fc"{
		gen x = `var' if black == 1 | white == 1
		reghdfe x i.grpworker_id i.grpworker_id#i.black if fc==0, resid absorb($fixedeffect)
	}
	}
	
	* retrieve average of fixed effect
	predict xbd, xbd
	predict xb, xb
	gen d = xbd - xb 
	sum d 
	local exp = r(mean)
	
	gen b_`var' = .
	gen b_se_`var' = .
	gen w_`var' = .
	gen w_se_`var' = .
   
   	* Quietly
	qui {
	* White children
	foreach i in `levels'{
		capture: lincom _cons + _b[`i'.grpworker_id] + `exp'
		if _rc == 0 {
            replace w_`var' =  r(estimate) if grpworker_id == `i'
			replace w_se_`var'= r(se) if  grpworker_id==`i'
		}
	}
	* Black children
	foreach i in `levels'{
		capture: lincom _cons + _b[`i'.grpworker_id] + _b[`i'.grpworker_id#1.black] + `exp'
		if _rc == 0 {
            replace b_`var' =  r(estimate) if grpworker_id == `i'
			replace b_se_`var'= r(se) if  grpworker_id==`i'
		}
	}
	}
	drop x xbd xb d
}

*----------------------------------------------------------------------*
* Save screener-level dataset 
*----------------------------------------------------------------------*
bys worker_id: gen count_inv = _N
bys worker_id: egen count_black = total(black)
bys worker_id : egen count_white = total(white)

* Keep relevant vars
keep base_* b_* w_* se_* worker_id count_inv count_black count_white grpworker_id
duplicates drop worker_id, force 	
drop if worker_id == .

save "${cleandata}screener_placement_maltreatment_rates_qje.dta", replace
restore 

*************************
**(3) GENERATE CLUSTERED SES FOR SCREENERS' PLACEMENT AND MALTREATMENT RATES, CONDITIONAL ON CALL NOT BEING PLACED
*************************
preserve
* Loop over all the outcomes 
foreach var in fc inv6m {
	* Use full sample for foster care placement
	if "`var'"=="fc"{
		qui: reghdfe `var' i.grpworker_id, resid absorb($fixedeffect) cluster(childpartyid worker_id)
	}
	
	* For selectively observed vars, condition on not being placed in foster care 
	if "`var'"!="fc"{
		qui: reghdfe `var' i.grpworker_id if fc == 0, resid absorb($fixedeffect) cluster(childpartyid worker_id)
	}
	
	* retrieve average of fixed effect
	predict xbd, xbd
	predict xb, xb
	gen d = xbd - xb 
	sum d 
	local exp = r(mean)
	
	gen twse_`var' = .
	
	qui{
	foreach i in `levels' {
		capture: lincom _cons + _b[`i'.grpworker_id] + `exp'
		
		if _rc == 0 {
			replace twse_`var'= r(se) if  grpworker_id==`i'
		}
	}
	}
	drop xbd xb d
	* By race (black and white)
	* For fc placement outcome, use the entire sample
	qui {
	if "`var'"=="fc"{
		gen x = `var' if black == 1 | white == 1
		reghdfe x i.grpworker_id i.grpworker_id#i.black, absorb($fixedeffect) resid cluster(childpartyid worker_id)
	}
	
	* For other outcomes, condition on not being placed in foster care  
	if "`var'"!="fc"{
		gen x = `var' if black == 1 | white == 1
		reghdfe x i.grpworker_id i.grpworker_id#i.black if fc==0, resid absorb($fixedeffect) cluster(childpartyid worker_id)
	}
	}
	
	* retrieve average of fixed effect
	predict xbd, xbd
	predict xb, xb
	gen d = xbd - xb 
	sum d 
	local exp = r(mean)
	
	gen b_twse_`var' = .
	gen w_twse_`var' = .
   
   	* Quietly
	qui {
	* White children
	foreach i in `levels'{
		capture: lincom _cons + _b[`i'.grpworker_id] + `exp'
		if _rc == 0 {
			replace w_twse_`var'= r(se) if  grpworker_id==`i'
		}
	}
	* Black children
	foreach i in `levels'{
		capture: lincom _cons + _b[`i'.grpworker_id] + _b[`i'.grpworker_id#1.black] + `exp'
		if _rc == 0 {
			replace b_twse_`var'= r(se) if  grpworker_id==`i'
		}
	}
	}
	drop x xbd xb d
}

*----------------------------------------------------------------------*
* Save screener-level dataset 
*----------------------------------------------------------------------*
bys worker_id: gen count_inv = _N
bys worker_id: egen count_black = total(black)
bys worker_id : egen count_white = total(white)

* Keep relevant vars
keep b_* w_* twse_* worker_id 
duplicates drop worker_id, force 	
drop if worker_id == .

save "${cleandata}screener_placement_maltreatment_se_qje.dta", replace
restore

*************************
**(4) GENERATE FINAL SCREENER PLACEMENT AND SUBSEQUENT MALTREATMENT DATASET 
*************************
use "${cleandata}screener_placement_maltreatment_rates_qje.dta", clear 
merge 1:1 worker_id using "${cleandata}screener_placement_maltreatment_se_qje.dta", keepus(twse* b_twse* w_twse*) keep(1 3)

rename (b_inv6m w_inv6m b_se_inv6m w_se_inv6m b_fc w_fc b_se_fc w_se_fc) ///
(Y_b Y_w b_se_fe_inv6m w_se_fe_inv6m D_b D_w b_se_fe_fc w_se_fe_fc)

foreach x in D_w D_b {
		replace `x' = 1-`x'
 		replace `x'=0 if `x'<0
 		replace `x'=1 if `x'>1
		gen `x'2 = `x'^2
}

save "${cleandata}screener_placement_maltreatment_rates_se_qje.dta", replace